package cj.web.rss.util;

import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;
import org.jsoup.parser.XmlTreeBuilder;
import org.jsoup.select.Elements;

import cj.utils.DateUtils;
import cj.utils.HttpClientUtil;
import cj.utils.SHA1Util;
import cj.web.rss.domain.RssNews;

public class RssUtil {
	
	public static final String appId="ce6edc6707544583";//应用id
	public static final String appKey="1667e2b56b2e4ff2b5a77f3d6f716fc9";//应用Key
	public static final String channelId="1";//订阅频道的
	public static final String projectId="1";//如果多台抓取，需要该参数，并确保唯一
	public static void main(String[] args) throws Exception
	{
		String responseRss = invokeRss();
//		System.out.println(responseRss);
		//测试Demo
		//responseRss = HttpClientUtil.get("http://pub.zhongguowangshi.com/rss.xml", "utf-8");
		
//		Map repHead = analysisRssHead(responseRss);
//		System.out.println("===repHead==="+repHead);
		List<RssNews> rssNewslis = analysisRssItem(responseRss);
//		System.out.println("===repsItem==="+rssNewslis);
	}
	
	/**
	 * 根据SHA1加密获取Sign签名
	 * （用签名规则生成）
	 * @param appId			应用id	String
	 * @param appKey		应用Key	String
	 * @param timeStamp		时间戳	Long
	 * @return
	 */
	public static String returnSign(long timeStamp)
	{
		String str = "appId="+appId+"&appKey="+appKey+"&timeStamp="+timeStamp;
		String sign = SHA1Util.getSha1(str);
		return sign;
	}
	
	/**
	 * 调用新华社Rss接口
	 * @param channelId		通道ID
	 * @param appId			AppID
	 * @param timeStamp		时间戳
	 * @param sign			签名
	 * @param projectId		工程号（默认为1）
	 * @return
	 */
	public static String invokeRss()
	{
		//时间戳
		long timeStamp=DateUtils.dateToUnixTimestamp();
		//签名
		String sign = returnSign(timeStamp);
		String url = "http://pub.zhongguowangshi.com/rss?channelId="+channelId+
				"&appId="+appId+"&timeStamp="+timeStamp+"&sign="+sign+"&projectId="+projectId;
		String responseRss=HttpClientUtil.get(url, "utf-8");
		return responseRss;
	}
	
	/**
	 * 解析Rss内容Head
	 * @param responseRss	传递Rss内容
	 * @return	Map=>Head	定义每一篇文章的信息
	 * title=>媒体名称/网站频道名称
	 * description=>媒体名称/网站频道介绍
	 * language=>zh-CN（语言）
	 * pubDate=>RSS发布时间
	 * generator=>规定用于生成该 RSS feed 的程序名称
	 */
	public static Map analysisRssHead(String responseRss)
	{
		Document doc = Jsoup.parse(responseRss, "", new Parser(new XmlTreeBuilder()));
		
		Elements head = doc.getElementsByTag("channel");
		
		Map rep = new HashMap();
		
		String title = head.first().getElementsByTag("title").first().text();
		String description = head.first().getElementsByTag("description").first().text();
		String language = head.first().getElementsByTag("language").first().text();
		String pubDate = head.first().getElementsByTag("pubDate").first().text();
		String generator = head.first().getElementsByTag("generator").first().text();

		rep.put("title", title);
		rep.put("description", description);
		rep.put("language", language);
		rep.put("pubDate", pubDate);
		rep.put("generator", generator);
		
		return rep;
	}
	
	/**
	 * 解析Rss内容Item
	 * @param responseRss	传递Rss内容
	 * @return	List<Map>=>item	定义每一篇文章的信息
	 * title=>文章标题
	 * evacuate=>稿件状态（0：正常，2：改稿，3：撤稿）
	 * link=>文章URL地址（绝对地址）
	 * description=>文章描述/正文
	 * source=>文章来源
	 * pubDate=>文章发布时间
	 * updateDate=>文章更新时间
	 * productName=>产品线路名称
	 * productId=>产品码
	 * videoImage=>视频预览图的绝对路径
	 * videoUrl=>视频的绝对路径
	 * author=>文章来源名称
	 */
	public static List<RssNews> analysisRssItem(String responseRss)
	{
		Document doc = Jsoup.parse(responseRss, "", new Parser(new XmlTreeBuilder()));
		
		Elements items = doc.getElementsByTag("item");
		
		List<RssNews> rssNewslis = new ArrayList<RssNews>();
		
		for(Element obj:items)
		{
			RssNews rssNews = new RssNews();
			String title = obj.getElementsByTag("title").text();
			String evacuate = obj.getElementsByTag("evacuate").text();
			String link = obj.getElementsByTag("link").text();
			String description = obj.getElementsByTag("description").text();
			String source = obj.getElementsByTag("source").text();
			String pubDate = obj.getElementsByTag("pubDate").text();
			pubDate = DateUtils.getDateTime2Str(new Date(pubDate), DateUtils.DATE_FULL_STR);
			String updateDate = obj.getElementsByTag("updateDate").text();
			updateDate = DateUtils.getDateTime2Str(new Date(updateDate), DateUtils.DATE_FULL_STR);
			String productName = obj.getElementsByTag("productName").text();
			String productId = obj.select("productName").first().attr("productId");
			String videoImage = obj.select("videoUrl").first().attr("image");
			String videoUrl = obj.getElementsByTag("videoUrl").text();
			String author = obj.getElementsByTag("author").text();
//			System.out.println("=======================");
//			System.out.println("===title==="+title);
//			System.out.println("===evacuate==="+evacuate);
//			System.out.println("===link==="+link);
//			System.out.println("===description==="+description);
//			System.out.println("===source==="+source);
//			System.out.println("===pubDate==="+pubDate);
//			System.out.println("===updateDate==="+updateDate);
//			System.out.println("===productName==="+productName);
//			System.out.println("===productId==="+productId);
//			System.out.println("===videoImage==="+videoImage);
//			System.out.println("===videoUrl==="+videoUrl);
//			System.out.println("===author==="+author);
			
			rssNews.setTitle(title);
			rssNews.setEvacuate(Integer.parseInt(evacuate));
			rssNews.setLink(link);
			rssNews.setDescription(description);
			rssNews.setSource(source);
			rssNews.setPubDate(pubDate);
			rssNews.setUpdateDate(updateDate);
			rssNews.setProductName(productName);
			rssNews.setProductId(Integer.parseInt(productId));
			rssNews.setVideoImage(videoImage);
			rssNews.setVideoUrl(videoUrl);
			rssNews.setAuthor(author);
			rssNewslis.add(rssNews);
		}
		
		return rssNewslis;
	}
}
